/**************************************************
File: jaere_oil_learning.do
Author: Peter Maniloff, maniloff@mines.edu
Last Edit: 10/16/2018
Purpose: This code runs the analyses for 
	"Can Learning Explain Deterrence? Evidence from Oil & Gas Production"
	Input file "PA inspections.csv" is from the State of PA
	Run straight through to replicate results from the paper
*************************************************/ 


 
 

graph drop _all
global data "<your path here>"

clear

insheet using "$data\PA inspections.csv", clear

keep if facility_type == "Oil & Gas Location"
drop if site_id == "NA"
gen isComplaint =  strpos(inspection_type , "Complaint")
//drop if the inspection type occurs < 1% of the time.  Asbestos, aerial, etc.  
egen num_insp_type = count(1), by(inspection_type)
drop if num_insp_type < 0.01 * _N
egen operator_id = group(operator )
rename inspection_date inspection_date_str
gen inspection_date = date(inspection_date_str, "MDY")
gen moy = month(inspection_date)
gen found_violation = (inspection_result_description == "Violation(s) Noted" || inspection_result_description == "Viol(s) Noted &  Immediately Corrected")
tab found_violation 
replace found_violation = . if !found_violation & inspection_result_description != "No Violations Noted"
tab found_violation 
destring api_permit, ignore("-") gen(api)

gen int year = yofd(inspection_date)
gen byte month = mofd(inspection_date)


egen api = group(api_permit )

duplicates drop site_id inspection_date, force 

merge m:1 county year using "$data\county_pop_for_merge.dta", gen(_popmerge) keep(master match)
gen CountyName = county +" County"
merge m:1 CountyName using "C:\Users\maniloff\Dropbox\inspections learning deterrence\data\county data\ICPSR_20660\DS0001\20660-0001-Data_PA_only.dta", gen(_icpsrmerge) keep(master match)
drop Births02 Births03 Births04 Births05 Deaths4_7_00 Deaths01 Deaths02 Deaths03 Deaths04 Deaths05 IntlMig4_7_00 IntlMig01 IntlMig02 IntlMig03 IntlMig04 IntlMig05 IntMig4_7_00 IntMig01 IntMig02 IntMig03 IntMig04 IntMig05 Residual4_7_00 Residual01 Residual02 Residual03 Residual04 Residual05 GQPopBase00 GQPop00 GQPop01 GQPop02 GQPop03 GQPop04 GQPop05 HU_Cen00 HU_Base00 HU00 HU01 HU02 HU03 HU04 HU05 HUGr4_00_05 HUGr00_05 Pop0_4_05 Pop5_13_05 Pop14_17_05 Pop15_44_05 Pop16plus05 Pop18_24_05 Pop18plus05 Pop45_64_05 Pop65plus05 Pop85plus05 Pct0_14_05 Pct15_64_05 Pct65plus05 Pct85plus05 Male0_4_05 Male5_13_05 Male14_17_05 Male15_44_05 Male16plus05 Male18_24_05 Male18plus05 Male45_64_05 Male65plus05 Male85plus05 Male05 Fmale0_4_05 Fmale5_13_05 Fmale14_17_05 Fmale15_44_05 Fmale16plus05 Fmale18_24_05 Fmale18plus05 Fmale45_64_05 Fmale65plus05 Fmale85plus05 Fmale05 SexRatio05 MedianAge05 Male_MdAge05 Fmale_MdAge05 White05 Black05 AIAN05 Asian05 NHOPI05 TwoPlus05 PctWhite05 PctBlack05 PctAIAN05 PctAsian05 PctNHOPI05 PctTwoPlus05 NH_White05 NH_Black05 NH_AIAN05 NH_Asian05 NH_NHOPI05 NH_2plus05 NH05 PctNH05 H_White05 H_Black05 H_AIAN05 H_Asian05 H_NHOPI05 H_2plus05 H05 PctH05 Male_White05 Male_Black05 Male_AIAN05 Male_Asian05 Male_NHOPI05 Male_2plus05 Male_NH_White05 Male_NH_Black05 Male_NH_AIAN05 Male_NH_Asian05 Male_NH_NHOPI05 Male_NH_2plus05 Male_NH05 Male_H_White05 Male_H_Black05 Male_H_AIAN05 Male_H_Asian05 Male_H_NHOPI05 Male_H_2plus05 Male_H05 Fmale_White05 Fmale_Black05 Fmale_AIAN05 Fmale_Asian05 Fmale_NHOPI05 Fmale_2plus05 Fmale_NH_White05 Fmale_NH_Black05 Fmale_NH_AIAN05 Fmale_NH_Asian05 Fmale_NH_NHOPI05 Fmale_NH_2plus05 Fmale_NH05 Fmale_H_White05 Fmale_H_Black05 Fmale_H_AIAN05 Fmale_H_Asian05 Fmale_H_NHOPI05 Fmale_H_2plus05 Fmale_H05
drop CA05N0010_05 CA05N0020_05 CA05N0030_05 CA05N0035_05 CA05N0036_05 CA05N0037_05 CA05N0038_05 CA05N0042_05 CA05N0045_05 CA05N0046_05 CA05N0047_05 CA05N0050_05 CA05N0060_05 CA05N0061_05 CA05N0062_05 CA05N0070_05 CA05N0072_05 CA05N0081_05 CA05N0082_05 CA05N0090_05 CA05N0101_05 CA05N0102_05 CA05N0103_05 CA05N0104_05 CA05N0200_05 CA05N0300_05 CA05N0400_05 CA05N0500_05 CA05N0600_05 CA05N0700_05 CA05N0800_05 CA05N0900_05 CA05N1000_05 CA05N1100_05 CA05N1200_05 CA05N1300_05 CA05N1400_05 CA05N1500_05 CA05N1600_05 CA05N1700_05 CA05N1800_05 CA05N1900_05 CA05N2000_05 CA05N2001_05 CA05N2002_05 CA05N2010_05 CA05N2011_05 CA05N2012_05 CA06N0001_05 CA06N0005_05 CA06N0006_05 CA06N0007_05 CA06N0008_05 CA06N0009_05 CA06N0081_05 CA06N0082_05 CA06N0090_05 CA06N0101_05 CA06N0102_05 CA06N0103_05 CA06N0104_05 CA06N0200_05 CA06N0300_05 CA06N0400_05 CA06N0500_05 CA06N0600_05 CA06N0700_05 CA06N0800_05 CA06N0900_05 CA06N1000_05 CA06N1100_05 CA06N1200_05 CA06N1300_05 CA06N1400_05 CA06N1500_05 CA06N1600_05 CA06N1700_05 CA06N1800_05 CA06N1900_05 CA06N2000_05 CA06N2001_05 CA06N2002_05 CA06N2010_05 CA06N2011_05 CA06N2012_05 CA25N0010_05 CA25N0020_05 CA25N0040_05 CA25N0050_05 CA25N0060_05 CA25N0070_05 CA25N0080_05 CA25N0090_05 CA25N0100_05 CA25N0200_05 CA25N0300_05 CA25N0400_05 CA25N0500_05 CA25N0600_05 CA25N0700_05 CA25N0800_05 CA25N0900_05 CA25N1000_05 CA25N1100_05 CA25N1200_05 CA25N1300_05 CA25N1400_05 CA25N1500_05 CA25N1600_05 CA25N1700_05 CA25N1800_05 CA25N1900_05 CA25N2000_05 CA25N2001_05 CA25N2002_05 CA25N2010_05 CA25N2011_05 CA25N2012_05 CA30_120_05 CA30_130_05 CA30_140_05 CA30_150_05 CA30_160_05 CA30_170_05 CA30_290_05 CA30_300_05 CA30_310_05 CA35_010_05 CA35_020_05 CA35_030_05 CA35_040_05 CA35_050_05 CA35_090_05 CA35_100_05 CA35_110_05 CA35_111_05 CA35_113_05 CA35_114_05 CA35_120_05 CA35_130_05 CA35_140_05 CA35_150_05 CA35_160_05 CA35_170_05 CA35_180_05 CA35_190_05 CA35_200_05 CA35_210_05 CA35_220_05 CA35_230_05 CA35_240_05 CA35_250_05 CA35_260_05 CA35_270_05 CA35_280_05 CA35_290_05 CA35_300_05 CA35_310_05 CA35_320_05 CA35_330_05 CA35_340_05
drop CFFR_DR04 CFFR_DO04 CFFR_DX04 CFFR_GG04 CFFR_PC04 CFFR_SW04 CFFR_Dir04 CFFR_Def04 CFFR_NDef04 CFFR_DL04 CFFR_GL04 CFFR_II04 CFFR_Oth04 PropTax02 PropTaxPerCap02 LocTax02 LocTaxPerCap02 LocRev02 LocExp02 CrimePop04 IdxCrime04 M_IdxCrime04 Muder04 Rape04 Robbery04 Assault04 Burglary04 Larceny04 MVTheft04 Arson04 CrimeRate04
drop P1Bldg05 P1Units05 P1Cost05 P2Bldg05 P2Units05 P2Cost05 P3Bldg05 P3Units05 P3Cost05 P5Bldg05 P5Units05 P5Cost05 P_Bldg05 P_Units05 P_Cost05 P1BldgR05 P1UnitsR05 P1CostR05 P2BldgR05 P2UnitsR05 P2CostR05 P3BldgR05 P3UnitsR05 P3CostR05 P5BldgR05 P5UnitsR05 P5CostR05 P_BldgR05 P_UnitsR05 P_CostR05 P_Miss05 P_HPSA_Cty07 P_HPSA_Geo07 P_HPSA_Pop07 P_HPSA_Oth07 M_HPSA_Cty07 M_HPSA_Geo07 M_HPSA_Pop07 M_HPSA_Oth07 D_HPSA_Cty07 D_HPSA_Geo07 D_HPSA_Pop07 D_HPSA_Oth07 Mcare_Agd03 McareA_Agd03 McareB_Agd03 Mcare_Dsb03 McareA_Dsb03 McareB_Dsb03 Mcare03 McareA03 McareB03
drop LN_PctWater JanTempZ JanSunZ JulTempZ JulHumidZ TypographyZ WaterZ ClimateZone WarmHumid KoppenClass ClimateRegion Pop01 Pop02 Pop03 Pop04 Pop05 PopGr4_00_05 PopGr00_05 Births4_7_00 Births01 LF05 Emp05 Unemp05 UnempRate05 EconType04 HouseStrs04 LowEduc04 LowEmp04 PerstPov04 PopLoss04
drop CBSA_Title CSA_Title TotalVotes04-PctOther04 MetroDivTitle
compress

gen county_pop_density = (population/1000) / (LandArea*640)  //make it '000 people / acre
gen Metro = RuralUrban03 < 3
gen Adjacent = RuralUrban03 == 4 | RuralUrban03 == 6 | RuralUrban03 == 8


drop v1 moy operator inspection_date_str api_permit farm_name site_name facility_type inspection_category region  municipality inspection_result_description inspection_comment  violation_type violation_comment resolved_date resolution_reason_code_descripti enforcement_id enforcement_code_description date_executed enforcement_final_date enforcement_final_status penalty_final_date penalty_final_status_code_descri 
egen inspection_type_id = group(inspection_type )
drop inspection_type
drop violation_date violation_id
drop total_amount_collected
destring site_id, replace

gen byte isInspection = 1
duplicates drop site_id inspection_date, force

describe
gen byte district = 1 if strpos("Butler, Clarion, Crawford, Elk, Erie, Forest, Jefferson, Lawrence, Mckean, Mercer, Venango, Warren",county)
replace district = 2 if strpos("Allegheny, Armstrong, Beaver, Cambria, Fayette, Greene, Indiana, Somerset, Washington, Westmoreland",county)
replace district = 3 if missing(district)
egen byte county_id  = group(county)
drop county


destring penalty_amount , gen(fine) ignore("$,")
replace fine = 0 if missing(fine)
drop penalty_amount

drop Latitude Longitude WaterArea LandArea JanTemp JulTemp Amenity*


sort site_id inspection_date, stable
gen tmpt = _n
by site_id : egen mintmpt = min(tmpt)
gen site_inspection_num = tmpt - mintmpt + 1
replace site_inspection_num = . if missing(site_id )

sort county_id  inspection_date, stable
replace tmpt = _n
drop mintmpt 
by county_id  : egen mintmpt = min(tmpt)
gen county_inspection_num = tmpt - mintmpt 
drop tmpt mintmpt
gen general_deterrence_tally = county_inspection_num - site_inspection_num 



bysort api inspection_date: gen tmpt = _n
by api : egen mintmpt = min(tmpt )
gen ismintmpt = mintmpt == tmpt 
sort operator_id inspection_date
by operator_id : gen cum_num_wells = sum(ismintmpt )
sort operator_id district inspection_date
by operator_id district : gen cum_num_wells_dist = sum(ismintmpt ) 

sort operator_id  inspection_date


gen isFine = fine > 0 & !missing(fine)
sort operator_id inspection_date, stable //stable sorts because operator-date pairs aren't unique
by operator_id: gen cum_fines = sum(fine)
by operator_id: gen num_fines = sum(isFine)
gen avg_fine = cum_fines / num_fines
sum avg_fine
gen spec_fines_z = (cum_fines - r(mean) )/r(sd)
sort county_id  inspection_date, stable
by county_id: gen cum_county_fines = sum(fine)
by county_id: gen num_county_fines = sum(isFine)
gen avg_co_fine = cum_county_fines / num_county_fines
sort county_id operator_id inspection_date, stable
by county_id operator_id: gen oper_co_cum_fine = sum(fine)
by county_id operator_id: gen oper_co_num_fine = sum(isFine)
gen general_deterrence_fine = (cum_county_fines - oper_co_cum_fine) / (num_county_fines-oper_co_num_fine)
sum general_deterrence_fine
gen general_deterrence_fine_z = (general_deterrence_fine -  r(mean)) / r(sd) 
egen mean_fine = mean(fine) if isFine

sum general_deterrence_tally
gen general_deter_z = (general_deterrence_tally  -  r(mean)) / r(sd) 


label variable general_deter_z "General Inspection"
label variable spec_fines_z "Own Fine" 
label variable general_deterrence_fine_z "General Fine"


sum found_violation 



xtset site_id inspection_date , daily
compress
drop  CountyName StateName
describe
tsfill
xtset site_id inspection_date , daily

gen isUnconventional = unconventional == "Y" if !missing(unconventional)

replace isInspection = 0 if missing(isInspection )
by site_id (inspection_date): carryforward county_id operator_id  district isUnconventional county_pop_density Metro Adjacent PctWater  NonmetRec04, replace


xtset site_id inspection_date

drop year
gen year = yofd(inspection_date)
gen moy = month(inspection_date)
//Calculate your selection variable

gen inspection_year = yofd(inspection_date)
gen trend = inspection_year - 2000
sort operator_id inspection_date, stable
by operator_id: gen totfound = sum(found_violation )


egen int numCountyToday = total(isInspection ), by(county_id  inspection_date)
gen  byte inCountyToday = (numCountyToday - isInspection ) > 0 
replace numCountyToday = numCountyToday - isInspection



/*****************
ANALYSIS 
use the OLS to generate a predicted probability of inspection
lag the predicted probability
keep only inspections
IV for own inspections in core model

********************/


xtset operator_id
xtreg  isInspection numCountyToday inCountyToday county_pop_density Metro Adjacent  PctWater  NonmetRec04 i.moy i.year, fe
eststo fe_isInsp
predict ihat_fe, xbu




rename ihat_fe ihat


//Use ihat to calculate deterrence measures

egen int numVio = sum(found_violation ), by(site_id inspection_date)
duplicates drop site_id inspection_date, force
replace found_violation = numVio > 0 & !missing(numVio )

sort operator_id inspection_date
by operator_id: egen first_insp_date = min(inspection_date)
gen isFirstInspOper = inspection_date == first_insp_date 
bysort site_id: egen first_insp_date_site = min(inspection_date)
gen isFirstInspSite  = inspection_date == first_insp_date_site 
sort operator_id inspection_date
sort district operator_id inspection_date
by district operator_id: gen operator_inspection_num = sum(ihat)
gen operator_inspection_rate = operator_inspection_num / cum_num_wells_dist





xtset site_id inspection_date , daily
compress
sort district operator_id inspection_date
compress
keep if isInspection



//run the heckman to get the mills ratio
xtset site_id inspection_date , daily
compress
tsfill
xtset site_id inspection_date , daily

replace isInspection = 0 if missing(isInspection )
by site_id (inspection_date): carryforward  operator_id county_id isUnconventional county_pop_density Metro Adjacent PctWater  NonmetRec04, replace

xtset site_id inspection_date

drop year moy
gen year = yofd(inspection_date)
gen moy = month(inspection_date)
drop numCountyToday inCountyToday
egen int numCountyToday = total(isInspection ), by(county_id  inspection_date)
gen  byte inCountyToday = (numCountyToday - isInspection ) > 0 
replace numCountyToday = numCountyToday - inCountyToday

label variable NonmetRec04 "ERS Recreation County"
label variable PctWater "Percent Water"
label variable Metro "Metro Area"
label variable Adjacent "Metro Adjacent"
label variable county_pop_density "Pop Density (1000 People / Acre)"
label variable numCountyToday "Num Insp In County"
label variable inCountyToday "Any Insp In County"

/***************
Table 5
****************/

 
reg isInspection numCountyToday inCountyToday county_pop_density Metro Adjacent  PctWater  NonmetRec04 i.year, cluster(operator_id)
eststo ols_isInsp

probit isInspection numCountyToday inCountyToday county_pop_density Metro Adjacent  PctWater  NonmetRec04 i.year, vce(cluster operator_id)
eststo probit_isInsp
estpost margins, dydx(numCountyToday inCountyToday county_pop_density Metro Adjacent  PctWater  NonmetRec04)
eststo probit_isInsp_mfx

logit isInspection numCountyToday inCountyToday county_pop_density Metro Adjacent  PctWater  NonmetRec04 i.year, cluster(operator_id)
eststo logit_isInsp
estpost margins, dydx(numCountyToday inCountyToday county_pop_density Metro Adjacent  PctWater  NonmetRec04)
eststo logit_isInsp_mfx

esttab ols_isInsp logit_isInsp_mfx probit_isInsp_mfx, nogaps   se(4) b(4)  star(* 0.10 ** 0.05 *** 0.01) label mlabel("OLS" "Logit" "Probit")
esttab ols_isInsp logit_isInsp_mfx probit_isInsp_mfx, nogaps   se(4) b(4)  star(* 0.10 ** 0.05 *** 0.01) label mlabel("OLS" "Logit" "Probit") tex


 

//heckman selection:

//inspections by district

sort district operator_id inspection_date
by district operator_id: gen operator_inspection_num_district = sum(isInspection)
gen operator_inspection_num_dist_sm = operator_inspection_num_district/1000

sum operator_inspection_rate
gen specific_deter_z = (operator_inspection_rate - r(mean))/r(sd)

gen opern_specific_insp_district = specific_deter_z * operator_inspection_num_dist_sm
gen opern_gen_insp_district = general_deter_z  * operator_inspection_num_dist_sm
gen opern_spec_fine_district = spec_fines_z  * operator_inspection_num_dist_sm
gen opern_gen_fine_district = general_deterrence_fine_z   * operator_inspection_num_dist_sm

compress
//get the mills ratio
//don't use these results from the second stage cause you want to bootstrap the se's
/******************
Table 14
*******************/
heckman found_violation    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district   opern_gen_fine_district   operator_inspection_num_dist_sm  i.moy   i.year i.inspection_type isFirstInspSite   , select(isInspection = numCountyToday inCountyToday county_pop_density Metro Adjacent  PctWater  NonmetRec04) twostep mills(millsratio) first  
eststo heckit_results
esttab heckit_results, nogaps   se(4) b(4)  star(* 0.10 ** 0.05 *** 0.01)
esttab heckit_results, nogaps   se(4) b(4)  star(* 0.10 ** 0.05 *** 0.01) tex


keep if isInspection
xtset, clear
save  "$data\cleaned_data.dta", replace


use  "$data\cleaned_data.dta", clear

drop violation_code //this is in the cleaned_data so you can make Table 6 below
gen found_violation100 = found_violation*100
sum operator_inspection_num_dist_sm   , de
xtset operator_id
xi: xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district   opern_gen_fine_district   operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite millsratio  if isInspection, fe
gen isEstSample = e(sample)



/******************
Table 2
*******************/

label variable isFirstInspSite "Site First Inspection"
label variable isUnconventional "Fracking Well"
label variable site_inspection_num "Site Inspection Number"
egen oper_first_insp_date_district = min(inspection_date), by(operator_id district)
gen days_operating_district = inspection_date - oper_first_insp_date_district
gen oper_years_operating_district = days_operating_district / 365
gen years_spec_insp_district = specific_deter_z * oper_years_operating_district
gen years_gen_insp_district = general_deter_z  * oper_years_operating_district
gen years_spec_fine_district = spec_fines_z  * oper_years_operating_district
gen years_gen_fine_district = general_deterrence_fine_z   * oper_years_operating_district

sort district operator_id inspection_date
by district operator_id: gen oper_well_count_dist = sum(isFirstInspSite) / 1000
sum oper_well_count_dist, de
gen oper_well_max_dist = r(max)



gen finesum = fine if fine > 0
sum found_violation isFine fine finesum operator_inspection_num_dist_sm oper_years_operating_district isFirstInspSite site_inspection_num oper_well_count_dist 

estpost tabstat found_violation isFine finesum operator_inspection_num_dist_sm oper_years_operating_district oper_well_count_dist isFirstInspSite site_inspection_num  isUnconventional if isEstSample, statistics(mean sd count) columns(statistics)
esttab, cells("mean sd") tex

//you want percentiles of these for discussing results
sum operator_inspection_num_dist_sm oper_years_operating_district, de
sum finesum, de

/******************
Table 3
*******************/



xtset operator_id  
reg found_violation100    isUnconventional site_inspection_num  general_deter_z  general_deterrence_fine_z    i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample , cluster(operator_id)
estimates store ols_nospecific_nolearning

reg found_violation100    isUnconventional site_inspection_num  general_deter_z  general_deterrence_fine_z  spec_fines_z specific_deter_z   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample  , cluster(operator_id)		
eststo ols_nolearning

xtset operator_id  	
xtreg found_violation100    	isUnconventional site_inspection_num  general_deter_z   general_deterrence_fine_z    i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample , fe robust
estimates store fe_nospecific_nolearning

xtreg found_violation100    	isUnconventional site_inspection_num general_deter_z specific_deter_z   spec_fines_z general_deterrence_fine_z    i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample , fe robust
estimates store fe_nolearning

xtset api
xtreg found_violation100    isUnconventional site_inspection_num general_deter_z specific_deter_z   spec_fines_z general_deterrence_fine_z    i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample , fe robust
estimates store api_nolearning


label variable specific_deter_z "Own Inspection"
esttab ols_nospecific_nolearning ols_nolearning fe_nospecific_nolearning fe_nolearning api_nolearning, ///
keep(general_deter_z specific_deter_z)  b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) label nogaps scalar(r2 r2_w rho)



esttab ols_nospecific_nolearning ols_nolearning fe_nospecific_nolearning fe_nolearning api_nolearning, ///
keep(general_deter_z specific_deter_z)  b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) label nogaps ///
scalar(r2 r2_w rho) tex

/******************
Table 4
Also produce the graphs and Tables that characterize the LPM's fit
Figure 2
Table 14
Also produce Table D1,2 - the other coeffs table

*******************/



//LPM's:
//Operator FE's
xtset operator_id
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district   opern_gen_fine_district   operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample , fe robust
eststo dist_insp_oper

//characterize the fit
predict yhat, xbu
replace yhat = yhat/100

count if !missing(yhat)
count if !missing(yhat) & yhat < 0
count if !missing(yhat) & yhat > 1
count if !missing(yhat) & yhat < -.1
sum yhat if found_violation == 1,de
sum yhat if found_violation == 0,de
kdensity yhat if found_violation ==  0 , addplot(kdensity yhat if found_violation ==  1) lpattern(dash) legend(off)  title("Inspections") ytitle("Firm Fixed Effects") xtitle("") graphregion(fcolor(white))  name(oper_dist_insp_yhat)
gen isOutsideRange = yhat < 0 | yhat > 1 & !missing(yhat)
sum isOutsideRange if found_violation == 1 
sum isOutsideRange if found_violation == 0 
drop isOutsideRange
drop yhat


xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z years_spec_insp_district years_gen_insp_district years_spec_fine_district years_gen_fine_district oper_years_operating_district  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample , fe robust
eststo dist_time_oper
predict yhat, xbu
replace yhat = yhat/100
count if !missing(yhat)
count if !missing(yhat) & yhat < 0
count if !missing(yhat) & yhat > 1
count if !missing(yhat) & yhat < -.1
sum yhat if found_violation == 1,de
sum yhat if found_violation == 0,de
kdensity yhat if found_violation ==  0 , addplot(kdensity yhat if found_violation ==  1) lpattern(dash) legend(off) title("Time") xtitle("") graphregion(fcolor(white))  name(oper_dist_time_yhat)
gen isOutsideRange = yhat < 0 | yhat > 1 & !missing(yhat)
sum isOutsideRange if found_violation == 1 
sum isOutsideRange if found_violation == 0 
drop isOutsideRange
drop yhat


//Well FE's
xtset api
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district   opern_gen_fine_district   operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample  , fe robust
eststo dist_insp_well
eststo api_year_insp_dist
predict yhat, xbu
replace yhat = yhat/100
count if !missing(yhat)
count if !missing(yhat) & yhat < 0
count if !missing(yhat) & yhat > 1
count if !missing(yhat) & yhat < -.1
sum yhat if found_violation == 1,de
sum yhat if found_violation == 0,de
kdensity yhat if found_violation ==  0 , addplot(kdensity yhat if found_violation ==  1) lpattern(dash) legend(off) title("") xtitle("") ytitle("Well Fixed Effects") graphregion(fcolor(white))  name(well_dist_insp_yhat)
gen isOutsideRange = yhat < 0 | yhat > 1 & !missing(yhat)
sum isOutsideRange if found_violation == 1 
sum isOutsideRange if found_violation == 0 
drop isOutsideRange
drop yhat


xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z years_spec_insp_district years_gen_insp_district years_spec_fine_district years_gen_fine_district oper_years_operating_district  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample , fe robust
eststo dist_time_well
eststo api_year_time_dist
predict yhat, xbu
replace yhat = yhat/100
count if !missing(yhat)
count if !missing(yhat) & yhat < 0
count if !missing(yhat) & yhat > 1
count if !missing(yhat) & yhat < -.1
sum yhat if found_violation == 1,de
sum yhat if found_violation == 0,de
kdensity yhat if found_violation ==  0 , addplot(kdensity yhat if found_violation ==  1) lpattern(dash) legend(off) title("") xtitle("") graphregion(fcolor(white))  name(well_dist_time_yhat)
gen isOutsideRange = yhat < 0 | yhat > 1 & !missing(yhat)
sum isOutsideRange if found_violation == 1 
sum isOutsideRange if found_violation == 0 
drop isOutsideRange
drop yhat

//make the interaction term 1-tailed by hand
//Table 4

esttab dist_insp_oper dist_insp_well dist_time_oper  dist_time_well, ///
	keep(general_deter_z  opern_gen_insp_district years_gen_insp_district )  ///
	 se(3) star(* 0.10 ** 0.05 *** 0.01) label nogaps sca(r2_w rho )
	 
esttab dist_insp_oper dist_insp_well dist_time_oper  dist_time_well, ///
	keep(general_deter_z  opern_gen_insp_district years_gen_insp_district )  ///
	b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) label nogaps sca(r2_w rho ) tex 


	 
//Table D1, D2
esttab dist_insp_oper dist_insp_well dist_time_oper  dist_time_well, ///
	drop(general_deter_z  opern_gen_insp_district years_gen_insp_district )  ///
	b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) label nogaps sca(rho r2_w)
esttab dist_insp_oper dist_insp_well dist_time_oper  dist_time_well, ///
	drop(general_deter_z  opern_gen_insp_district years_gen_insp_district )  ///
	b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) label nogaps sca(rho r2_w) tex 
 
	
//Figure 2
//LPM fit graph
graph combine  oper_dist_insp_yhat  oper_dist_time_yhat  well_dist_insp_yhat  well_dist_time_yhat , xcommon graphregion(fcolor(white))

//Table C1 from sum statements above
	
	




/******************
Table 8
Note - using areg instead of xtreg to make the R^2's comparable to the literature r^2's
also replace deterrence with -1*deterrence because of the convention of the psacalc command

*******************/

gen neg_gen_det_z = -1*general_deter_z
gen neg_spec_det_z = -1*specific_deter_z
xtset operator_id
xi: areg found_violation100    isUnconventional site_inspection_num neg_spec_det_z neg_gen_det_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district   opern_gen_fine_district   operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite   if isEstSample, robust absorb(operator_id)
psacalc delta neg_spec_det_z, rmax(0.35)
psacalc delta neg_gen_det_z, rmax(0.35)


xi: areg found_violation100    isUnconventional site_inspection_num neg_spec_det_z neg_gen_det_z  spec_fines_z general_deterrence_fine_z years_spec_insp_district years_gen_insp_district years_spec_fine_district years_gen_fine_district oper_years_operating_district  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample , robust absorb(operator_id)
psacalc delta neg_spec_det_z, rmax(0.35)
psacalc delta neg_gen_det_z, rmax(0.35)





/******************
Table B1
*******************/


sort operator_id inspection_date
gen byte ones = 1
by operator_id: gen run_num = sum(ones)
gen avg_fine_filled = cum_fines / run_num
sum avg_fine_filled
gen spec_fines_z_filled = (avg_fine_filled - r(mean) )/r(sd)

sort county_id  inspection_date
by county_id: gen num_county_run_num = sum(ones)
sort county_id operator_id inspection_date
by county_id operator_id: gen oper_co_run_num = sum(ones)
gen general_deterrence_fine_filled = (cum_county_fines - oper_co_cum_fine) / (num_county_run_num-oper_co_run_num)
sum general_deterrence_fine_filled
gen general_deterrence_fine_z_filled = (general_deterrence_fine_filled -  r(mean)) / r(sd) 

gen opern_spec_fine_district_filled = spec_fines_z_filled  * operator_inspection_num_dist_sm
gen opern_gen_fine_district_filled = general_deterrence_fine_z_filled   * operator_inspection_num_dist_sm
gen years_spec_fine_district_filled  = spec_fines_z_filled   * oper_years_operating_district
gen years_gen_fine_district_filled  = general_deterrence_fine_z_filled    * oper_years_operating_district

xtset operator_id
xi: xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z_filled  general_deterrence_fine_z_filled  opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district_filled    opern_gen_fine_district_filled    operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite    , fe robust // vce(bootstrap, r(100) seed(101))   cluster(operator_id)
eststo learn_insp_dist_filled
xi: xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z_filled general_deterrence_fine_z_filled    years_spec_insp_district years_gen_insp_district years_spec_fine_district_filled years_gen_fine_district_filled oper_years_operating_district   i.moy   i.trend i.inspection_type i.county_id isFirstInspSite    , fe robust // vce(bootstrap, r(100) seed(101)  )  cluster(operator_id)
eststo learn_years_dist_filled
xtset api
xi: xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z_filled  general_deterrence_fine_z_filled  opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district_filled    opern_gen_fine_district_filled    operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite    , fe robust // vce(bootstrap, r(100) seed(101))   cluster(operator_id)
eststo learn_insp_dist_filled_api
xi: xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z_filled general_deterrence_fine_z_filled    years_spec_insp_district years_gen_insp_district years_spec_fine_district_filled years_gen_fine_district_filled oper_years_operating_district   i.moy   i.trend i.inspection_type i.county_id isFirstInspSite    , fe robust // vce(bootstrap, r(100) seed(101)  )  cluster(operator_id)
eststo learn_years_dist_filled_api

label variable spec_fines_z_filled "Own Fine"
label variable general_deterrence_fine_z_filled "General Fine"

esttab learn_insp_dist_filled learn_insp_dist_filled_api learn_years_dist_filled learn_years_dist_filled_api, keep(general_deter_z opern_gen_insp_district years_gen_insp_district spec_fines_z_filled general_deterrence_fine_z_filled opern_spec_fine_district_filled years_spec_fine_district_filled opern_gen_fine_district_filled years_gen_fine_district_filled) nogaps se star(* 0.10 ** 0.05 *** 0.01) label 
esttab learn_insp_dist_filled learn_insp_dist_filled_api learn_years_dist_filled learn_years_dist_filled_api, keep(general_deter_z opern_gen_insp_district years_gen_insp_district spec_fines_z_filled general_deterrence_fine_z_filled opern_spec_fine_district_filled years_spec_fine_district_filled opern_gen_fine_district_filled years_gen_fine_district_filled) nogaps se star(* 0.10 ** 0.05 *** 0.01) label  tex








/******************
Table E1
The lagged and short-term models are below
*******************/

//no indiv effect
xi: reg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district   opern_gen_fine_district   operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample  , cluster(operator_id)
eststo nofe_insp_dist
xi: reg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z years_spec_insp_district years_gen_insp_district years_spec_fine_district years_gen_fine_district oper_years_operating_district  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample  , cluster(operator_id)
eststo nofe_time_dist

//firm size
gen wells_spec_insp_dist = specific_deter_z * oper_well_count_dist
gen wells_gen_insp_dist = general_deter_z  * oper_well_count_dist
gen wells_spec_fine_dist = spec_fines_z  * oper_well_count_dist
gen wells_gen_fine_dist = general_deterrence_fine_z   * oper_well_count_dist

gen wells_for_graph_dist = _n/ 1000
replace wells_for_graph_dist = . if wells_for_graph_dist > oper_well_max_dist

label variable wells_gen_insp_dist "Gen Insp * District Wellcount"

xtset operator_id
xi: reg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z wells_spec_insp_dist wells_gen_insp_dist wells_spec_fine_dist wells_gen_fine_dist oper_well_count_dist  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample  , cluster(operator_id)
eststo firm_size_insp_dist

esttab nofe_insp_dist nofe_time_dist firm_size_insp_dist, ///
	keep(general_deter_z  opern_gen_insp_district  years_gen_insp_district wells_gen_insp_dist) ///
	nogaps se star(* 0.10 ** 0.05 *** 0.01)

//logit
xtset operator_id
set matsize 7500
capture noisily xtgee  found_violation   isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district   opern_gen_fine_district   operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample ,  family(binomial) link(logit) corr(exchangeable) tol(1e-2) 
estpost margins, dydx(general_deter_z opern_gen_insp_district )
eststo logit_insp_oper_gee

capture noisily logit  found_violation   isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z    years_spec_insp_district years_gen_insp_district years_spec_fine_district years_gen_fine_district oper_years_operating_district   i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample  ,
matrix b = e(b)
capture noisily xtgee  found_violation   isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z    years_spec_insp_district years_gen_insp_district years_spec_fine_district years_gen_fine_district oper_years_operating_district   i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isEstSample  ,  family(binomial) link(logit) corr(exchangeable)  tol(1.5e-2)  from(b, skip)
estpost margins, dydx(general_deter_z years_gen_insp_district )
eststo logit_years_oper_gee

esttab logit_insp_oper_gee logit_years_oper_gee  , ////
	keep(general_deter_z  opern_gen_insp_district years_gen_insp_district )  ///
	b(5) se(5) star(* 0.20 ** 0.1 *** 0.02) label nogaps sca(rho r2_w)   replace


/****************
Table F1-3
*****************/


// inspections in county
sort county_id operator_id inspection_date
by county_id operator_id: gen operator_inspection_num_county = sum(isInspection)
gen operator_inspection_num_co_sm = operator_inspection_num_county/1000
gen opern_specific_insp_county = specific_deter_z * operator_inspection_num_co_sm
gen opern_gen_insp_county = general_deter_z  * operator_inspection_num_co_sm
gen opern_spec_fine_county = spec_fines_z  * operator_inspection_num_co_sm
gen opern_gen_fine_county = general_deterrence_fine_z   * operator_inspection_num_co_sm
xtset operator_id
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_county   opern_gen_insp_county  opern_spec_fine_county   opern_gen_fine_county  operator_inspection_num_co_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite   if  isEstSample,  fe robust
eststo insp_co_oper
xtset api 
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_county   opern_gen_insp_county  opern_spec_fine_county   opern_gen_fine_county  operator_inspection_num_co_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite   if  isEstSample,  fe robust
eststo insp_co_api

//time the operator has been operating in county
egen oper_first_insp_date = min(inspection_date), by(county_id operator_id)
gen days_operating = inspection_date - oper_first_insp_date
gen oper_years_operating = days_operating / 365
gen years_spec_insp = specific_deter_z * oper_years_operating
gen years_gen_insp = general_deter_z  * oper_years_operating
gen years_spec_fine = spec_fines_z  * oper_years_operating
gen years_gen_fine = general_deterrence_fine_z   * oper_years_operating
xtset operator_id
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z years_spec_insp years_gen_insp years_spec_fine years_gen_fine oper_years_operating     i.moy   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample , fe robust
eststo time_co_oper
xtset api
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z years_spec_insp years_gen_insp years_spec_fine years_gen_fine oper_years_operating     i.moy   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample , fe robust
eststo time_co_api

//number of inspections in state
sort  operator_id inspection_date
by  operator_id: gen operator_inspection_num_pa_sm = sum(isInspection)
replace operator_inspection_num_pa_sm = operator_inspection_num_pa_sm / 1000 
gen opern_specific_insp = specific_deter_z * operator_inspection_num_pa_sm
gen opern_gen_insp = general_deter_z  * operator_inspection_num_pa_sm
gen opern_spec_fine = spec_fines_z  * operator_inspection_num_pa_sm
gen opern_gen_fine = general_deterrence_fine_z   * operator_inspection_num_pa_sm
xtset operator_id
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp   opern_gen_insp   opern_spec_fine     operator_inspection_num_pa_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample  , fe robust
eststo insp_state_oper
xtset api
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp   opern_gen_insp   opern_spec_fine     operator_inspection_num_pa_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite if isEstSample  , fe robust
eststo insp_state_api

//time in state
egen oper_first_insp_date_pa = min(inspection_date), by(operator_id)
gen days_operating_pa = inspection_date - oper_first_insp_date_pa
gen oper_years_operating_pa = days_operating_pa / 365
gen years_spec_insp_pa = specific_deter_z * oper_years_operating_pa
gen years_gen_insp_pa = general_deter_z  * oper_years_operating_pa
gen years_spec_fine_pa = spec_fines_z  * oper_years_operating_pa
gen years_gen_fine_pa = general_deterrence_fine_z   * oper_years_operating_pa
xtset operator_id
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z   years_spec_insp_pa years_gen_insp_pa years_spec_fine_pa years_gen_fine_pa oper_years_operating_pa  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite   if isEstSample , fe robust
eststo time_state_oper
xtset api
xtreg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z   years_spec_insp_pa years_gen_insp_pa years_spec_fine_pa years_gen_fine_pa oper_years_operating_pa  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite   if isEstSample , fe robust
eststo time_state_api

//summary stats table
eststo sumstatsspace: estpost sum  operator_inspection_num_co_sm operator_inspection_num_pa_sm oper_years_operating  oper_years_operating_pa 
esttab sumstatsspace, cell("mean(fmt(3)) sd")
esttab sumstatsspace, cell("mean(fmt(3)) sd") tex

//regression results tables
esttab insp_co_oper insp_co_api time_co_oper time_co_api, ///
	keep(general_deter_z *gen_insp* ) se(3) b(3) ///
	star(* 0.10 ** 0.05 *** 0.01) label nogaps

esttab insp_state_oper insp_state_api time_state_oper time_state_api, ///
	keep(general_deter_z *gen_insp* ) se(3) b(3) ///
	star(* 0.10 ** 0.05 *** 0.01) label nogaps
	
esttab insp_co_oper insp_co_api time_co_oper time_co_api, ///
	keep(general_deter_z *gen_insp* ) se(3) b(3) ///
	star(* 0.10 ** 0.05 *** 0.01) label nogaps tex

esttab insp_state_oper insp_state_api time_state_oper time_state_api, ///
	keep(general_deter_z *gen_insp* ) se(3) b(3) ///
	star(* 0.10 ** 0.05 *** 0.01) label nogaps tex
	
	
//check the stars
esttab insp_co_oper insp_co_api time_co_oper time_co_api, ///
	keep(general_deter_z *gen_insp* ) se(3) b(3) ///
	star(* 0.20 ** 0.1 *** 0.02) label nogaps

esttab insp_state_oper insp_state_api time_state_oper time_state_api, ///
	keep(general_deter_z *gen_insp* ) se(3) b(3) ///
	star(* 0.20 ** 0.1 *** 0.02) label nogaps

	
	
	
	
	
	
	
	
	
	
	
/**********************
Table E1
Short term analyses
***********************/

	
use "$data\cleaned_data.dta", clear
drop violation_code

egen int firstSiteInspDate = min(inspection_date), by(site_id )
gen byte isFirstSiteInsp = inspection_date == firstSiteInspDate
egen int firstOperDate = min(inspection_date), by(operator_id)
gen years_operating = (inspection_date - firstOperDate) / 365 

egen oper_first_insp_date_district = min(inspection_date), by(operator_id district)
gen days_operating_district = inspection_date - oper_first_insp_date_district
gen oper_years_operating_district = days_operating_district / 365

egen oper_first_insp_date_co = min(inspection_date), by(operator_id county_id)
gen days_operating_co = inspection_date - oper_first_insp_date_co
gen oper_years_operating_co = days_operating_co / 365
sort county_id operator_id inspection_date
gen ones = 1
by county_id operator_id: gen operator_inspection_num_co = sum(ones)
drop ones
gen operator_inspection_num_co_sm = operator_inspection_num_co/1000


expand 2 if isFirstSiteInsp , gen(isExp)
replace inspection_date = inspection_date - 731 if isExp
expand 2 , gen(isl60Exp)
replace inspection_date = inspection_date - 60 if isl60Exp

egen int site_panel_id = group(site_id)
drop site_id
duplicates tag site_panel_id inspection_date, gen(duptag)
drop if duptag & isl60Exp
drop duptag

xtset site_panel_id inspection_date

drop avg_fine avg_co_fine
drop cum_*
drop oper_co_cum_fine general_deterrence_fine  mean_fine
drop inCountyToday numCountyToday millsratio _est_heckit_results
drop operator_inspection_rate num_insp_type fine tmpt
drop unconventional ID State County FIPS Division Region CBSA CBSA_Status MetroDiv CSA PopCen00 PopBase00 Pop00 NonmetRec04 Retirement04 UrbanInf03 RuralUrban03 CA05N0071_05 JanSun JulHumid Typography
tsfill
replace isInspection = 0 if missing(isInspection)
bysort operator_id inspection_date : gen int num_oper_wells = sum(isFirstInspSite)+1
bysort county_id inspection_date: gen int num_county_wells = sum(isFirstInspSite)+1
bysort county_id operator_id inspection_date: gen int num_co_oper_wells = sum(isFirstInspSite)+1


carryforward operator_inspection_num, replace
carryforward county_id, replace


sort operator_id inspection_date
by operator_id  : gen int oper_insp_num = sum(isInsp )  
sort county_id operator_id inspection_date
by county_id operator_id : gen int oper_co_insp_num = sum(isInsp )

sort county_id inspection_date
by county_id  : gen int co_insp_num = sum(isInsp )
gen int co_other_insp = co_insp_num - oper_co_insp_num
gen int num_co_other_wells = num_county_wells - num_co_oper_wells

/*****************************
generate your time-specific measures and interaction terms
*****************************/
xtset
gen oper_insp_num_sm = oper_insp_num / 1000


gen spec_insp_det365 = (oper_insp_num-l365.oper_insp_num) / num_oper_wells
gen gen_insp_det365 = (co_other_insp-l365.co_other_insp) / num_county_wells

gen spec_insp_det730 = (oper_insp_num-l730.oper_insp_num) / num_oper_wells
gen gen_insp_det730 = (co_other_insp-l730.co_other_insp) / num_county_wells



/*************************************
Generate your treatments
*************************************/
//Only use inspections for regression
keep if isInspection | l60.isInspection

gen found_violation100 = found_violation * 100
xtset operator_id

sum spec_insp_det365
gen spec_insp_det365_z = (spec_insp_det365-r(mean))/r(sd)
sum gen_insp_det365
gen gen_insp_det365_z = (gen_insp_det365-r(mean))/r(sd)
gen gen_insp_365_exp = oper_insp_num_sm*gen_insp_det365_z
gen spec_insp_365_exp = oper_insp_num_sm*spec_insp_det365_z
gen gen_insp_365_exp_dist = operator_inspection_num_dist_sm*gen_insp_det365_z
gen spec_insp_365_exp_dist = operator_inspection_num_dist_sm*spec_insp_det365_z
gen gen_insp_365_exp_co = operator_inspection_num_co_sm*gen_insp_det365_z
gen spec_insp_365_exp_co = operator_inspection_num_co_sm*spec_insp_det365_z

gen gen_insp_365_time = years_operating*gen_insp_det365_z
gen spec_insp_365_time = years_operating*spec_insp_det365_z
gen gen_insp_365_time_dist = oper_years_operating_district*gen_insp_det365_z
gen spec_insp_365_time_dist = oper_years_operating_district*spec_insp_det365_z
gen gen_insp_365_time_co = oper_years_operating_co*gen_insp_det365_z
gen spec_insp_365_time_co = oper_years_operating_co*spec_insp_det365_z


sum spec_insp_det730 
gen spec_insp_det730_z = (spec_insp_det730 -r(mean))/r(sd)
sum gen_insp_det730 
gen gen_insp_det730_z = (gen_insp_det730 -r(mean))/r(sd)
gen gen_insp_730_exp = oper_insp_num_sm*gen_insp_det730_z
gen spec_insp_730_exp = oper_insp_num_sm*spec_insp_det730_z
gen gen_insp_730_dist = operator_inspection_num_dist_sm*gen_insp_det730_z
gen spec_insp_730_dist = operator_inspection_num_dist_sm*spec_insp_det730_z
gen gen_insp_730_exp_dist = operator_inspection_num_dist_sm*gen_insp_det730_z
gen spec_insp_730_exp_dist = operator_inspection_num_dist_sm*spec_insp_det730_z
gen gen_insp_730_exp_co = operator_inspection_num_co_sm*gen_insp_det730_z
gen spec_insp_730_exp_co = operator_inspection_num_co_sm*spec_insp_det730_z

gen gen_insp_730_time = years_operating*gen_insp_det730_z
gen spec_insp_730_time = years_operating*spec_insp_det730_z
gen gen_insp_730_time_dist = oper_years_operating_district*gen_insp_det730_z
gen spec_insp_730_time_dist = oper_years_operating_district*spec_insp_det730_z
gen gen_insp_730_time_co = oper_years_operating_co*gen_insp_det730_z
gen spec_insp_730_time_co = oper_years_operating_co*spec_insp_det730_z



/*************************************
Run regressions.  
*************************************/
drop if isExp 
xtset, clear
reg found_violation100    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district   opern_gen_fine_district   operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite   if isInspection,
gen isEstSample = e(sample)

xtset operator_id
xtreg found_violation100 spec_insp_det365_z spec_insp_365_exp_dist gen_insp_det365_z gen_insp_365_exp_dist operator_inspection_num_dist_sm  isUnconventional i.year i.moy i.county_id i.inspection_type_id site_inspection_num isFirstInspSite if isEstSample, fe robust
eststo exp_dist_365
xtreg found_violation100 spec_insp_det365_z spec_insp_365_time_dist gen_insp_det365_z gen_insp_365_time_dist oper_years_operating_district  isUnconventional i.year i.moy i.county_id i.inspection_type_id site_inspection_num isFirstInspSite if isEstSample, fe robust
eststo time_dist_365


xtreg found_violation100 spec_insp_det730_z spec_insp_730_exp_dist gen_insp_det730_z gen_insp_730_exp_dist operator_inspection_num_dist_sm isUnconventional i.year i.moy i.county_id i.inspection_type_id site_inspection_num isFirstInspSite if isEstSample, fe robust
eststo exp_dist_730
xtreg found_violation100 spec_insp_det730_z spec_insp_730_time_dist gen_insp_det730_z gen_insp_730_time_dist oper_years_operating_district isUnconventional i.year i.moy i.county_id i.inspection_type_id site_inspection_num isFirstInspSite if isEstSample, fe robust
eststo time_dist_730

//short term w lags
xtset site_panel_id inspection_date
gen l60spec_insp_det365_z = l60.spec_insp_det365_z
gen l60spec_insp_365_exp_dist = l60.spec_insp_365_exp_dist
gen l60gen_insp_det365_z =l60.gen_insp_det365_z
gen l60gen_insp_365_exp_dist= l60.gen_insp_365_exp_dist

gen l60spec_insp_det730_z = l60.spec_insp_det730_z
gen l60spec_insp_730_exp_dist = l60.spec_insp_730_exp_dist
gen l60gen_insp_det730_z =l60.gen_insp_det730_z
gen l60gen_insp_730_exp_dist= l60.gen_insp_730_exp_dist

gen l60spec_insp_365_time_dist = l60.spec_insp_365_time_dist
gen l60gen_insp_365_time_dist= l60.gen_insp_365_time_dist
gen l60spec_insp_730_time_dist = l60.spec_insp_730_time_dist
gen l60gen_insp_730_time_dist= l60.gen_insp_730_time_dist


gen f60isEstSample = f60.isEstSample

xtset operator_id
xtreg found_violation100 l60spec_insp_det730_z l60spec_insp_730_exp_dist l60gen_insp_det730_z l60gen_insp_730_exp_dist operator_inspection_num_dist_sm  isUnconventional i.year i.moy i.county_id i.inspection_type_id site_inspection_num isFirstInspSite if f60isEstSample, fe robust
eststo exp_dist_730_l60
xtreg found_violation100 l60spec_insp_det730_z l60spec_insp_730_time_dist l60gen_insp_det730_z l60gen_insp_730_time_dist oper_years_operating_district isUnconventional i.year i.moy i.county_id i.inspection_type_id site_inspection_num isFirstInspSite if isEstSample, fe robust
eststo time_dist_730_l60


//short term results
//hand copy these into the robustness results table
esttab exp_dist_365 time_dist_365 , ///
	keep(*en_insp_det365_z *en_insp_365_*_dist  *en_insp_*_dist) ///
	se(3) b(3)  star(* 0.20 ** 0.1 **^^ 0.05 *** 0.02  ***^ 0.01)

esttab  exp_dist_730 time_dist_730, ///
	keep( *en_insp_det730_z *en_insp_*_dist) ///
	se(3) b(3)  star(* 0.20 ** 0.1 **^^ 0.05 *** 0.02  ***^ 0.01)

	
//short term lagged results
esttab   exp_dist_730_l60 time_dist_730_l60, ///
	keep( *en_insp_det730_z *en_insp_*_dist) ///
	se(3) b(3)  star(* 0.20 ** 0.1 **^^ 0.05 *** 0.02  ***^ 0.01)


/**********************
Table E1
lagged analyses
*********************/


use "$data\cleaned_data.dta", clear

keep if isInspection
drop specific_deter_z general_deter_z spec_fines_z general_deterrence_fine_z operator_inspection_num_district* opern_specific_insp_district opern_gen_insp_district  opern_spec_fine_district opern_gen_fine_district
drop cum_num_wells operator_inspection_rate
drop county_inspection_num site_inspection_num
drop general_deterrence_tally
drop  oper_co_cum_fine oper_co_num_fine cum_county_fines num_county_fines
drop general_deterrence_fine cum_fines  operator_inspection_num_dist_sm

egen int firstSiteInspDate = min(inspection_date), by(site_id )
gen byte isFirstSiteInsp = inspection_date == firstSiteInspDate

expand 2 if isFirstSiteInsp , gen(isExp1)
replace inspection_date = inspection_date - 731 if isExp1

egen int site_panel_id = group(site_id)
drop site_id
xtset site_panel_id inspection_date


drop tmpt mintmpt ismintmpt unconventional ID State County FIPS Division Region CBSA CBSA_Status MetroDiv CSA PopCen00 PopBase00 Pop00 NonmetRec04 Retirement04 UrbanInf03 RuralUrban03 CA05N0071_05 JanSun JulHumid Typography
compress


expand 2, gen(isExp2)
replace inspection_date = inspection_date - 30 if isExp2
expand 2 if !isExp1 & !isExp2, gen(isExp3)
replace inspection_date = inspection_date - 60 if isExp3
expand 2 if !isExp1 & !isExp2 & !isExp3, gen(isExp4)
replace inspection_date = inspection_date - 365 if isExp4
expand 2 if !isExp1 & !isExp2 & !isExp3 & !isExp4, gen(isExp5)
replace inspection_date = inspection_date - 730 if isExp5


expand 2 if !isExp1 & !isExp2 & !isExp3 & !isExp4 & !isExp5, gen(isExp6)
replace inspection_date = inspection_date - (60+365) if isExp5
expand 2 if !isExp1 & !isExp2 & !isExp3 & !isExp4 & !isExp5 & !isExp6, gen(isExp7)
replace inspection_date = inspection_date - (60+730) if isExp5


gen isExp = isExp1 | isExp2 | isExp3 | isExp4 | isExp5 | isExp6 | isExp7

duplicates tag api inspection_date, gen(duptag)
drop if duptag & isExp
drop duptag

replace isInspection = 0 if isExp





replace isInspection = 0 if missing(isInspection)
bysort operator_id inspection_date : gen int num_oper_wells = sum(isFirstInspSite)+1

carryforward operator_inspection_num county_id district, replace

replace fine = 0 if missing(fine)

sort operator_id inspection_date
by operator_id  : gen int oper_insp_num = sum(isInsp )  
gen oper_insp_num_sm = oper_insp_num / 1000


bysort api inspection_date: gen tmpt = _n
by api : egen mintmpt = min(tmpt )
gen ismintmpt = mintmpt == tmpt 
sort operator_id inspection_date
by operator_id : gen cum_num_wells = sum(ismintmpt ) 
gen operator_inspection_rate = operator_inspection_num / cum_num_wells 
drop mintmpt tmpt ismintmpt

sort county_id  inspection_date
by county_id: gen county_inspection_num = sum(isInspection)
sort site_panel_id  inspection_date
by site_panel_id: gen site_inspection_num = sum(isInspection)
gen general_deterrence_tally = county_inspection_num - site_inspection_num 

sum operator_inspection_rate  
gen specific_deter_z = (operator_inspection_rate -  r(mean)) / r(sd) 
sum general_deterrence_tally
gen general_deter_z = (general_deterrence_tally  -  r(mean)) / r(sd) 


sort operator_id inspection_date
by operator_id: gen cum_fines = sum(fine)
gen spec_fines_z = (cum_fines - r(mean) )/r(sd)

sort county_id  inspection_date
by county_id: gen cum_county_fines = sum(fine)
by county_id: gen num_county_fines = sum(isFine)

sort county_id operator_id inspection_date
by county_id operator_id: gen oper_co_cum_fine = sum(fine)
by county_id operator_id: gen oper_co_num_fine = sum(isFine)
gen general_deterrence_fine = (cum_county_fines - oper_co_cum_fine) / (num_county_fines-oper_co_num_fine)
sum general_deterrence_fine
gen general_deterrence_fine_z = (general_deterrence_fine -  r(mean)) / r(sd) 




sort district operator_id inspection_date
by district operator_id: gen operator_inspection_num_district = sum(isInspection)
gen operator_inspection_num_dist_sm = operator_inspection_num_district/1000

gen opern_specific_insp_district = specific_deter_z * operator_inspection_num_dist_sm
gen opern_gen_insp_district = general_deter_z  * operator_inspection_num_dist_sm
gen opern_spec_fine_district = spec_fines_z  * operator_inspection_num_dist_sm
gen opern_gen_fine_district = general_deterrence_fine_z   * operator_inspection_num_dist_sm

egen int oper_first_insp_date_district = min(inspection_date), by(operator_id district)
gen int days_operating_district = inspection_date - oper_first_insp_date_district
gen int oper_years_operating_district = days_operating_district / 365

gen years_specific_insp_district = specific_deter_z * oper_years_operating_district
gen years_gen_insp_district = general_deter_z  * oper_years_operating_district
gen years_spec_fine_district = spec_fines_z  * oper_years_operating_district
gen years_gen_fine_district = general_deterrence_fine_z   * oper_years_operating_district
   
 
/******************
regressions
First is lagged specifications
Then short term specifications
Then short term lagged specifications
******************/
gen found_violation100 = found_violation*100

//lagged specifications
xtset api inspection_date 
gen l60site_inspection_num = l60.site_inspection_num 
gen l60specific_deter_z = l60.specific_deter_z 
gen l60general_deter_z  = l60.general_deter_z  
gen l60spec_fines_z =l60.spec_fines_z
gen l60general_deterrence_fine_z =l60.general_deterrence_fine_z 
gen l60opern_specific_insp_district   =l60.opern_specific_insp_district  
gen l60opern_gen_insp_district   =l60.opern_gen_insp_district 
gen l60opern_spec_fine_district   =l60.opern_spec_fine_district
gen l60opern_gen_fine_district =  l60.opern_gen_fine_district
gen l60oper_inspection_num_dist_sm = l60.operator_inspection_num_dist_sm

gen l60years_specific_insp_district   =l60.years_specific_insp_district  
gen l60years_gen_insp_district   =l60.years_gen_insp_district 
gen l60years_spec_fine_district   =l60.years_spec_fine_district
gen l60years_gen_fine_district =  l60.years_spec_fine_district
gen l60oper_years_district = l60.oper_years_operating_district



gen l365site_inspection_num = l365.site_inspection_num 
gen l365specific_deter_z = l365.specific_deter_z 
gen l365general_deter_z  = l365.general_deter_z  
gen l365spec_fines_z =l365.spec_fines_z
gen l365general_deterrence_fine_z =l365.general_deterrence_fine_z 
gen l365opern_specific_insp_district   =l365.opern_specific_insp_district  
gen l365opern_gen_insp_district   =l365.opern_gen_insp_district 
gen l365opern_spec_fine_district   =l365.opern_spec_fine_district
gen l365opern_gen_fine_district =  l365.opern_gen_fine_district
gen l365oper_inspection_num_dist_sm = l365.operator_inspection_num_dist_sm

gen l365years_specific_insp_district   =l365.years_specific_insp_district  
gen l365years_gen_insp_district   =l365.years_gen_insp_district 
gen l365years_spec_fine_district   =l365.years_spec_fine_district
gen l365years_gen_fine_district =  l365.years_spec_fine_district
gen l365oper_years_district = l365.oper_years_operating_district


//inspections based experience
xtset operator_id
xtreg found_violation100    isUnconventional l60site_inspection_num l60specific_deter_z l60general_deter_z  l60spec_fines_z l60general_deterrence_fine_z l60opern_specific_insp_district   l60opern_gen_insp_district   l60opern_spec_fine_district   l60opern_gen_fine_district   l60oper_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isInspection, fe robust
eststo insp_oper_l60

xtreg found_violation100    isUnconventional l365site_inspection_num l365specific_deter_z l365general_deter_z  l365spec_fines_z l365general_deterrence_fine_z l365opern_specific_insp_district   l365opern_gen_insp_district   l365opern_spec_fine_district   l365opern_gen_fine_district   l365oper_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isInspection, fe robust
eststo insp_oper_l365



//time based experience

xtset operator_id
xtreg found_violation100    isUnconventional l60site_inspection_num l60specific_deter_z l60general_deter_z  l60spec_fines_z l60general_deterrence_fine_z l60years_specific_insp_district   l60years_gen_insp_district   l60years_spec_fine_district   l60years_gen_fine_district   l60oper_years_district  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isInspection, fe robust
eststo years_oper_l60

xtreg found_violation100    isUnconventional l365site_inspection_num l365specific_deter_z l365general_deter_z  l365spec_fines_z l365general_deterrence_fine_z l365years_specific_insp_district   l365years_gen_insp_district   l365years_spec_fine_district   l365years_gen_fine_district   l365oper_years_district i.moy   i.trend i.inspection_type i.county_id isFirstInspSite  if isInspection, fe robust
eststo years_oper_l365


//hand-copy coeffs into the robustness table in tex
esttab insp_oper_l60   years_oper_l60 ,  ///
	keep(*general_deter_z *gen_insp_district) se(3) b(3) ///
	 star(* 0.10 ** 0.05 *** 0.01) label nogaps 
	 
esttab insp_oper_l365   years_oper_l365 ,  ///
	keep(*general_deter_z *gen_insp_district) se(3) b(3) ///
	 star(* 0.10 ** 0.05 *** 0.01) label nogaps 
	
	
	
	
	
	
	
	
	
/**********************
Table 6
***********************/

	
	
use  "$data\cleaned_data.dta", clear
xi: reg found_violation    isUnconventional site_inspection_num specific_deter_z general_deter_z  spec_fines_z general_deterrence_fine_z opern_specific_insp_district   opern_gen_insp_district   opern_spec_fine_district   opern_gen_fine_district   operator_inspection_num_dist_sm  i.moy   i.trend i.inspection_type i.county_id isFirstInspSite   millsratio if isInspection,
keep if e(sample)

//tests for whether violations by experienced firms are of different type
//set up variables
gen byte isSpill = strpos(violation_code , "discharge") | strpos(violation_code , "release")
gen byte isPlan =  strpos(violation_code , "102.4INADPLN")  |  strpos(violation_code , "102.4NOPLAN ")
gen byte isPermitSign = strpos(violation_code, "201TAG") | strpos(violation_code, "201G") | strpos(violation_code, "201H")

egen oper_first_insp_date_district = min(inspection_date), by(operator_id district)
gen days_operating_district = inspection_date - oper_first_insp_date_district
gen oper_years_operating_district = days_operating_district / 365


qui: sum  oper_years_operating_district, de
gen isExperienced = oper_years_operating_district > r(p75)
gen isInExperienced = oper_years_operating_district < r(p25)
label variable isExperienced "Experienced Firm"

sum isSpill isPlan isPermitSign
xtset operator_id 
set matsize 11000

//run regressions
//report treatment coeff as odds ratio
melogit isSpill    isExperienced  i.year i.moy i.county_id || operator_id: , cov(unstruct) diff tech(dfp) or //seems more computationally stable
eststo water_oper_discrete
sum isSpill
estadd scalar prob = r(mean)

melogit isPlan    isExperienced  i.year i.moy i.county_id || operator_id: , cov(unstruct) diff tech(dfp) or //seems more computationally stable
eststo plan_oper_discrete
sum isPlan
estadd scalar prob = r(mean)

melogit isPermitSign    isExperienced  i.year i.moy i.county_id || operator_id: , cov(unstruct) diff tech(dfp) or //seems more computationally stable
eststo sign_oper_discrete
sum isPermitSign
estadd scalar prob = r(mean)

esttab water_oper_discrete plan_oper_discrete sign_oper_discrete, stats(prob N) keep(isExperienced) b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) label nogaps nomtitle eform
esttab water_oper_discrete plan_oper_discrete sign_oper_discrete, stats(prob N) keep(isExperienced) b(3) se(3) star(* 0.10 ** 0.05 *** 0.01) label nogaps nomtitle eform tex

	
	
/**********************
THIS IS THE END
***********************/



